In [80]:
# All imports
from glob import glob
from tqdm import tqdm

import cv2
import matplotlib.pyplot as plt
import numpy as np

__DEBUG__= False

Utility methods

In [2]:
def imgread_rgb(path):
    return cv2.cvtColor(cv2.imread(path),cv2.COLOR_BGR2RGB)
In [3]:
def plot_many(imgs,titles,shape,cmaps=None,figsize=(24,9)):
    
    assert len(imgs) == len(titles) and len(imgs) == shape[0]*shape[1]
    
    fig, axes = plt.subplots(shape[0],shape[1],figsize=figsize)
    axes = axes.flatten()
    
    for i in range(len(imgs)):
        axes[i].imshow(imgs[i],cmap=cmaps[i] if cmaps is not None else None)
        axes[i].set_title(titles[i])

    plt.show()
In [4]:
def draw_polygon(img,pts,color=[255,0,0],thickness=5,isClosed=True):
    n = len(pts)
    try:
        for i in range(n-1):
            cv2.line(img,pts[i],pts[i+1],color,thickness) 

        if isClosed == True:
            cv2.line(img,pts[n-1],pts[(0)],color,thickness)
    except :
        print("Error trying to draw line {} {}".format(pts[i],pts[i+1]))
        raise 
In [5]:
def overlay_polygon(img,pts,alpha=1.,beta=1.,gamma=0.):
    overlay = np.zeros_like(img)    
    draw_polygon(overlay,pts)
    return cv2.addWeighted(img,alpha,overlay,beta,gamma)

Camera Calibration

The camera was calibrated using chessboard images provided by Udacity. Object points are coordinates of internal corners of chessboard images represented as integers. The x coordintes for the object points range between [0,8] and the y coordinate lie in the range [0,5]. Actual coordinates for the same corners are estimated by findChessboardCorners() and then camera calibration is performed using calibrateCamera(). In 17 out of 20 images, the requird number of corners were found and these coordinates were used to calibratet the camera.

In [20]:
# Function for performing camera calibraation

def calibrate_camera(calib_img_dir,nb_x_corners,nb_y_corners):
    
    calib_img_paths = glob(calib_img_dir+'*.jpg')

    # Object points are 3-D integral coordinates of all internal corners
    # in a Chessboard image. The z coordinates are all zero.

    # The object points matrix has as many columns as there are x positions
    # and as many rows as there are y positions.

    x_coords,y_coords = np.meshgrid(range(nb_x_corners),range(nb_y_corners))
    z_coords = np.zeros(shape=[nb_y_corners,nb_x_corners])

    obj_points = np.dstack((x_coords,y_coords,z_coords))

    # Reshape the matrx of coordinates in to a list that has all coordinates
    # from left to right and top to bottom.
    obj_points = obj_points.reshape([-1,3])

    obj_points_list = []
    img_points_list = []

    detected = 0

    for path in tqdm(calib_img_paths):
        img = imgread_rgb(path)
        grey = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
        ret, corners = cv2.findChessboardCorners(grey,(nb_x_corners,nb_y_corners))

        if ret == True:
            obj_points_list.append(obj_points)
            img_points_list.append(corners)

            detected += 1
            
    # Reshape object points and image points as required by the new OpenCV interface
    obj_points_matrix = np.array(obj_points_list).reshape([detected,-1,3])
    img_points_matrix = np.array(img_points_list).reshape([detected,-1,2])
    
    # Perform actual calibration using our object points and 
    # image points found above.
    test_img = imgread_rgb('camera_cal/calibration2.jpg')
    retval, cameraMatrix, distCoeffs, rvecs, tvecs =\
    cv2.calibrateCamera(obj_points_matrix.astype(np.float32),
                        img_points_matrix.astype(np.float32),
                        (test_img.shape[1],test_img.shape[0]),
                        None,
                        None)
    
    if __DEBUG__:
        grey = cv2.cvtColor(test_img,cv2.COLOR_RGB2GRAY)
        ret, corners = cv2.findChessboardCorners(grey,(nb_x_corners,nb_y_corners))
        cv2.drawChessboardCorners(test_img,(nb_x_corners,nb_y_corners),corners,ret)
        plt.imshow(test_img)
        plt.show()
    
    return retval, cameraMatrix, distCoeffs, rvecs, tvecs
In [21]:
# Perform camera calibration.

calib_img_dir = './camera_cal/'

# Number of internal corners in the calibration images.
# Obtained by manual inspection.
nb_x_corners = 9
nb_y_corners = 6

retval, cameraMatrix, distCoeffs, rvecs, tvecs = calibrate_camera(calib_img_dir,nb_x_corners,nb_y_corners)
100%|██████████| 20/20 [00:04<00:00,  4.00it/s]

Testing Undistortion

A sample image was undistorted using the camera matrix and distortion coefficients found above.

In [22]:
# Test distortion correction.
def test_undistortion(img_path):
    test_img = imgread_rgb(img_path)
    undistorted = cv2.undistort(test_img,cameraMatrix,distCoeffs)
    plot_many([test_img,undistorted],['orginal','udistorted'],[1,2])
    
if __DEBUG__ == True:
    test_undistortion('camera_cal/calibration1.jpg')

Gradient Masks

The cells below define functions for different types of masks based on thresholding the Sobel transforms of an image.

In [29]:
def get_thresholds():
    return {'x':(50,180), 'y':(50,180), 'mag':(80,150),'ang':(0.85,1.0),'s':(170,255),'l':(50,250)}
In [30]:
def get_abs_sobel_mask(img, orient='x', kernel=3, thresh=(0,255)):
    
    grey  = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    
    if orient == 'x':
        abs_sobel = np.abs(cv2.Sobel(grey,cv2.CV_64F,1,0,None,kernel))
    elif orient == 'y':
        abs_sobel = np.abs(cv2.Sobel(grey,cv2.CV_64F,0,1,None,kernel))
    else:
        raise Exception("Invalid orientation {}".format(x))
     
    scaled_abs_sobel = (255. * abs_sobel / np.max(abs_sobel)).astype(np.uint8)
    
    binary_mask = np.logical_and(abs_sobel >= thresh[0], abs_sobel <= thresh[1] )
    
    return binary_mask.astype(np.uint8)
In [31]:
def get_mag_sobel_mask(img, kernel=3, mag_thresh=(0,255)):
    
    grey = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    
    abs_sobel_x = np.abs(cv2.Sobel(grey,cv2.CV_64F,1,0,None,kernel))
    abs_soble_y = np.abs(cv2.Sobel(grey,cv2.CV_64F,0,1,None,kernel))
    abs_sobel_mag = np.sqrt(abs_sobel_x**2+abs_soble_y**2)
    
    scaled_abs_sobel_mag = (abs_sobel_mag * 255.0 / np.max(abs_sobel_mag)).astype(np.uint8)
    
    binary_mask = np.logical_and(scaled_abs_sobel_mag >= mag_thresh[0], 
                                 scaled_abs_sobel_mag <= mag_thresh[1])
    
    return binary_mask.astype(np.uint8)
In [32]:
def get_ang_sobel_mask(img, kernel=3, ang_thresh=(0,np.pi/2.)):
    
    grey = cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    
    abs_sobel_x = np.abs(cv2.Sobel(grey,cv2.CV_64F,1,0,None,kernel))
    abs_soble_y = np.abs(cv2.Sobel(grey,cv2.CV_64F,0,1,None,kernel))
    
    
    abs_sobel_angle = np.arctan2(abs_soble_y,abs_sobel_x)
    
    
    binary_mask = np.logical_and(abs_sobel_angle >= ang_thresh[0], 
                                 abs_sobel_angle <= ang_thresh[1])
    
    return binary_mask.astype(np.uint8)
In [33]:
def get_sobel_masks(img,mask_names=['x','y','mag','ang'],threshold_params=None):
   
    masks = []
    if 'x' in mask_names:
        x_mask = get_abs_sobel_mask(img,orient='x',kernel=3,thresh=threshold_params['x'])
        masks.append(x_mask)
    
    if 'y' in mask_names:
        y_mask = get_abs_sobel_mask(img,orient='y',kernel=3,thresh=threshold_params['y'])
        masks.append(y_mask)
        
    if 'mag' in mask_names:
        mag_mask = get_mag_sobel_mask(img,kernel=3,mag_thresh=threshold_params['mag'])
        masks.append(mag_mask)
        
    if 'ang' in mask_names:
        ang_mask = get_ang_sobel_mask(img,kernel=15,ang_thresh=threshold_params['ang'])
        masks.append(ang_mask)
    
    return masks

Channel Masks

The function below returns a function that can generate a color mask by thresholdiong any of R,G, B, H, S or V channels.

In [37]:
def get_channel_mask_fn(chan_name:str):
    ''' 
    Returns a function that can be used to generate a binary mask from an RGB or HSV image.
    The function returned s configured based on the channel_name passed here.
    if the chan_name argument is 'h', 's' or 'v', automatic olorspace conversion would be done
    by the returned function.
    '''

    
    name_to_index = { 'b':2, 'g':1, 'r':0, 'h':0, 'l':1, 's':2}
    
    if chan_name not in name_to_index.keys():
        raise Exception('{} chan_name is not a valid chanel name'.format(chan_name))
        
    def no_conversion(img):
        return img
        
    color_scheme_converson_fn = lambda img: no_conversion(img) if chan_name in ['b','g','r'] else cv2.cvtColor(img,cv2.COLOR_RGB2HLS)
    
    def _f(img,thresh=(0,255)):
        '''
        img: RGB or HSV image.Performs automatic colorspace conversion if needed.
        '''
        img_converted = color_scheme_converson_fn(img)
        channel = img_converted[:,:,name_to_index[chan_name]]
        binary_mask = (np.logical_and(channel >= thresh[0], channel <= thresh[1])).astype(np.uint8)
    
        return binary_mask
    
    return _f

Visualizing Mask Combinations

The function below visulizes the output of different mask combinations that were tried during the early phase of the project. It can be run after enabing the DEBUG flag in the first cell. Below the output of the function can be seen for a single arbitrary test image.

In [44]:
# Visulaize individual and combined sobel masks for all test images.
def visualize_candidate_masks(img_paths,group=0):

    s_mask_fn = get_channel_mask_fn('s')
    h_mask_fn = get_channel_mask_fn('h')
    l_mask_fn = get_channel_mask_fn('l')

    for path in img_paths:
        img_rgb = imgread_rgb(path)

        [x_mask,y_mask,mag_mask,ang_mask] = get_sobel_masks(img_rgb,threshold_params=get_thresholds())
        x_y_mask = x_mask & y_mask
        mag_ang_mask = mag_mask & ang_mask

        all_mask1 = x_y_mask | mag_ang_mask

        s_mask = s_mask_fn(img_rgb,thresh=(80,255))
        l_mask = l_mask_fn(img_rgb,thresh=(127,255))
        
        w_mask = ~s_mask_fn(img_rgb,thresh=(170,100)) & ~l_mask_fn(img_rgb,thresh=(0,80))

        x_s_mask = x_mask | s_mask
        y_s_mask = y_mask & s_mask
        x_y_s_mask = x_s_mask & y_s_mask

        mag_s_mask = mag_mask | s_mask
        ang_s_mask = ang_mask | s_mask
        mag_ang_s_mask = mag_s_mask | ang_s_mask

        all_mask2 = x_y_s_mask | mag_ang_s_mask

    
        if group == 0:
            plot_many([img_rgb,x_mask,y_mask,x_y_mask,
                       mag_mask,ang_mask,mag_ang_mask,all_mask1,
                       s_mask,x_s_mask,y_s_mask,x_y_s_mask,
                       mag_s_mask,ang_s_mask,mag_ang_s_mask,all_mask2],
                      ['original','x','y','(x&y)',
                       'magnitude','angle','(mag&ang)','All1',
                       's','(s|x)','(s|y)','(s|x) & (s|y)', 
                       '(s|mag)','(s&ang)','((s|mag) |(s&ang))','All'],
                      [4,4],figsize=(24,18))
            
        elif group == 1:

            plot_many([img_rgb,x_s_mask,mag_s_mask,
                       ang_s_mask,(x_s_mask|mag_s_mask|ang_s_mask),((s_mask&x_mask)|(s_mask&y_mask)|mag_mask|s_mask&ang_mask)],
                      ['original','(s|x)','(s|mag)',
                       '(s&ang)','(s|x)| (s|mag)|(s&ang)','((s&x)|(s&y))|mag|(s&ang)'],
                      [2,3])
            
        elif group == 2:
            plot_many([img_rgb,mag_ang_s_mask],
                      ['original','(s|mag)|(s&ang)'],
                      [1,2])
            
        elif group == 3:
            plot_many([img_rgb,x_mask,y_mask,mag_mask,
                       ang_mask,s_mask,l_mask,s_mask&ang_mask],
                      [path,'x','y','mag',
                       'ang','s','l','s&ang'],
                      [2,4])
            
        elif group == 4:
            plot_many([img_rgb,x_mask&s_mask,x_mask&l_mask],
                      [path,'x&s','x & l'],
                      [1,3])
            
        elif group == 5:
            plot_many([img_rgb,get_lane_mask(img_rgb,get_thresholds())],
                      [path,'lane'],
                      [1,2])


if __DEBUG__ == True:
    visualize_candidate_masks(glob('test_images/test2.jpg'),3)

Selecting a Lane Mask

The function below combines different different binary masks to isolate lane pixels. The exact combinnation formaula was arived at using both intuition as well as trial and error. Here are a few points that were considered.

  1. X and Y Sobel masks alway identify edges belonging to lane markings but also include other edges.
  2. A Sobel angle mask will have dense regions where yellow markings are present but has uniform noise.
  3. S channel highlights the lane markings clearly if images have good saturation.
  4. L channel can be combined with S channel to exclude edges from low-lit areas.
In [45]:
def get_lane_mask(img_rgb,thresh_params):
    mag_mask,ang_mask = get_sobel_masks(img_rgb,['mag','ang'],thresh_params)
    
    get_s_mask = get_channel_mask_fn('s')
    get_l_mask = get_channel_mask_fn('l')
    
    s_mask = get_s_mask(img_rgb,thresh=thresh_params['s'])
    l_mask = get_l_mask(img_rgb,thresh=thresh_params['l'])
    
    
    mag_s_mask = mag_mask | s_mask
    ang_s_mask = ang_mask & s_mask
    mag_ang_s_mask = mag_s_mask | ang_s_mask
    mag_ang_s_l_mask = mag_ang_s_mask & l_mask
    
    return mag_ang_s_l_mask 

Visualizing Final Lane Mask

The selected mask can be visualized for the mask function above.

In [47]:
 visualize_candidate_masks(glob('test_images/*.jpg'),5)

Perpective Warping

Perspective warping lets us transform an image so that one can visualize how it would look when taken from a different position. In this case we want a top view or bird's eye view of the road. The function get_persprective_transforms() returns the required matrices for doing perspective and inverse perspective transforms. The function warp() performs a transform using these matrices.

The reference image used for deriving the transform matrices is plotted below. The coordinates used for finiding the transform were obtained manually from the same image. A rectangle is overlaid on top of the image showing the effect of the perspective transform. Another image with curved lanes was also tested to see if lanes remain parallel after the transfrom.

In [52]:
def get_persprective_transforms(src_points, dst_points):
    M = cv2.getPerspectiveTransform(np.float32(src_points),np.float32(dst_points))
    Minv = cv2.getPerspectiveTransform(np.float32(dst_points),np.float32(src_points))
    
    return M,Minv
In [53]:
def warp(img,M):
    img_size = img_size = (img.shape[1],img.shape[0])
    return cv2.warpPerspective(img,M,img_size,flags=cv2.INTER_LINEAR)
    
In [54]:
# Reference image.
perspective_ref_img_path = 'test_images/straight_lines1.jpg'
# Points seleted by manual inspetion of reference path.
perspective_src_points = [(203,720),(1099,720),(707,463),(580,463)]
perspective_dst_points = [(203,720),(1099,720),(1099,0),(203,0)]


# Find and apply perspective transform.
M,Minv = get_persprective_transforms(perspective_src_points,perspective_dst_points)
In [55]:
## Setup and test perspective transform.
def test_perspective_transform(ref_img_path,test_img_path,src_points,dst_points):
    # Load image of straight lane lines and apply undistortion.
    img_straight_rgb = imgread_rgb(ref_img_path)
    img_straight_rgb = cv2.undistort(img_straight_rgb,cameraMatrix,distCoeffs)


    img_straight_rgb_warped = warp(img_straight_rgb,M)

    # Test on train image
    plot_many([overlay_polygon(img_straight_rgb,src_points),overlay_polygon(img_straight_rgb_warped,dst_points)],
              ['original','warped'],
              [1,2])

    # Test on curved lane image
    curved_image = imgread_rgb(test_img_path)
    curved_image = cv2.undistort(curved_image,cameraMatrix,distCoeffs)
    transformed_image = warp(curved_image,M)
    plot_many([curved_image,transformed_image],['original','warped'],[1,2])
    
if __DEBUG__ == True:   
    test_perspective_transform(perspective_ref_img_path,'test_images/test5.jpg',
                               perspective_src_points,perspective_dst_points)

Lane Search and Curve Fitting

The function find_lanes() finds pixels that potentially belong to lane markings. It takes a binary mask image obtained above and returns the coordinates of the points belonging to lane markings. The second prameter to this function is a pair of polynomials. If the polynomials are given, then lanes are only searched in a region close to the lines described by the polynomials. If this parameter in None then the bases of the lane markings are detected using a histogram of bright pixels in the lower half of the binary mask. Then the rest of the lane pixels are searched by sliding a pair of windows upwards. The windows are horizontally recentered to the centroid of the points found earlier.

In [56]:
def find_lanes(binary_image,prev_fits = None, debug=False):

    if debug == True:
        debug_img = np.dstack((binary_image,binary_image,binary_image)) * 255
    
    # Windowed search params.
    nb_windows = 9
    window_half_width = 100
    
    # nonzero() returns row and column indices
    # convert to x and y coordinates.
    nonzero_y = np.nonzero(binary_image)[0]
    nonzero_x = np.nonzero(binary_image)[1]
    
    recenter_threshold = 50

    
    def find_lane_bases():
        # Find hot columns in the lower half of the image.
        mid_row = np.int(binary_image.shape[0] / 2)
        histogram = np.sum(binary_image[mid_row:,:],axis=0)

        mid_col = np.int(histogram.shape[0] / 2)

        # Return indices of hottest columns in the left and right halves. 
        return np.argmax(histogram[:mid_col]), mid_col + np.argmax(histogram[mid_col:])

    
    def search_lane(x_center,hot_pixel_color=None):
        window_height = np.int(binary_image.shape[0]/nb_windows)
        
        search_window_corners = []
        lane_x_coordintes = []
        lane_y_coordintes = []
        
        for i in range(nb_windows):
            window_x_lo = x_center - window_half_width
            window_x_hi = x_center + window_half_width
            
            # Height is number of rows in image matrix and y coordinate increases
            # from bottom to top so y_lo is the numerically lower, top extent of 
            # the window and y_hi is the numerically higher, bottom extent of the
            # window.
            window_y_lo = binary_image.shape[0] - (i+1)*window_height
            window_y_hi = window_y_lo + window_height
            
            # Add window coordintes to list.
            search_window_corners.append([(window_x_lo,window_y_lo),(window_x_hi,window_y_hi)])
            
            # Find 'ON' pixels that lie within the current window.
            is_on = ( (nonzero_x >= window_x_lo) & (nonzero_x <= window_x_hi) ) &\
                    ( (nonzero_y >= window_y_lo) & (nonzero_y <= window_y_hi) )
                
            on_x = nonzero_x[is_on]
            on_y = nonzero_y[is_on]
            
            if debug == True:
                debug_img[on_y,on_x,:] = hot_pixel_color
                cv2.rectangle(debug_img,(window_x_lo,window_y_lo),(window_x_hi,window_y_hi),[0,0,255],5)
            
            lane_x_coordintes.append(on_x)
            lane_y_coordintes.append(on_y)
            
            # If 'ON' pixel count is above threshold, recenter the window (horizontally).
            if np.sum(is_on) >= recenter_threshold:
                x_center = np.int(np.mean(on_x))

            
        
        lane_x_coordintes = np.concatenate(lane_x_coordintes)
        lane_y_coordintes = np.concatenate(lane_y_coordintes)
        
        return search_window_corners,(lane_x_coordintes,lane_y_coordintes)
    
    
    def probe_nearby(prev_fit,hot_pixel_color=None):
        
        # We already have pairs of all ON pixel coordinates
        # in nonzero_y[i] and nonzero_x[i]. For every nonzero_y[i]
        # get a the predicted value x_predicted[i]. 
        x_predicted = prev_fit[0] * nonzero_y**2 + prev_fit[1] * nonzero_y + prev_fit[2]
        
        # For every nononzero_x[i] check if it's close enough to our predicted 
        # x_predicted[i]
        is_x_in_vicinity = (nonzero_x >= x_predicted - window_half_width) &\
                           (nonzero_x <= x_predicted + window_half_width)

        lane_x_coordintes = nonzero_x[is_x_in_vicinity]
        lane_y_coordintes = nonzero_y[is_x_in_vicinity]
        
        return lane_x_coordintes, lane_y_coordintes
        
               
    
    # Search using histogram and sliding windows if prior ane position is not known.
    if prev_fits == None:
        if __DEBUG__ == True:
            print("Performing sliding window search.")
            
        left_lane_base_x, right_lane_base_x = find_lane_bases()
        
        left_lane_info  = search_lane(left_lane_base_x,hot_pixel_color=[255,0,0])
        right_lane_info = search_lane(right_lane_base_x,hot_pixel_color=[0,255,0])
        
    else:
        if __DEBUG__ == True:
            print("Performing targeted search.")
            
        left_lane_coords  = probe_nearby(prev_fits[0],hot_pixel_color=[255,0,0])
        right_lane_coords = probe_nearby(prev_fits[1],hot_pixel_color=[0,255,0])
        
        # Return empty list of window corners alongwith lane coordinates.
        left_lane_info  = ([],left_lane_coords )
        right_lane_info = ([],right_lane_coords)
    
    if debug == True:
        plt.imshow(debug_img)
        plt.show()
    
    
    return left_lane_info,right_lane_info
                
   

Fitting a Polynomial

A quadratic polynomial is fitted to the coordinates of the lane pixels returned by find_lanes() above. The polynomial can then be usd to perfrom other calculation that cannot be done using raw pixel coordinates.

In [60]:
def fit_poly_to_lane(x_pts,y_pts):
    return np.polyfit(y_pts,x_pts,2)
In [61]:
def eval_qudratic(coeffs,pts):
    return (coeffs[0] * pts**2 + coeffs[1] * pts + coeffs[2]).astype(np.int32)

Testing Lane Detection

The code below plots the lane search logic described above for a few sampple images.

In [64]:
def test_lane_detection(img_rgb_path):
    test_img_rgb = imgread_rgb(img_rgb_path)
    
    undistort_rgb = cv2.undistort(test_img_rgb,cameraMatrix,distCoeffs)
    binary_image = get_lane_mask(undistort_rgb,get_thresholds())
    
    perspective_corrected = warp(binary_image,M) 
    
    left_lane_info,right_lane_info = find_lanes(perspective_corrected)
    
    left_lane_windows = left_lane_info[0]
    right_lane_windows = right_lane_info[0]
    
    left_lane_x,left_lane_y = left_lane_info[1]
    right_lane_x, right_lane_y = right_lane_info[1]
    
    left_poly = fit_poly_to_lane(left_lane_x,left_lane_y)
    right_poly = fit_poly_to_lane(right_lane_x,right_lane_y)
    
    ys = np.array(range(perspective_corrected.shape[0]),dtype=np.int32)
    xs_left  = eval_qudratic(left_poly,ys)
    xs_right = eval_qudratic(right_poly,ys)
  
    search_area_img = np.dstack((perspective_corrected,perspective_corrected,perspective_corrected)) * 255
    
    for window in left_lane_windows:
        cv2.rectangle(search_area_img,window[0],window[1],color=[255,0,0],thickness=5)
    
    for window in right_lane_windows:
        cv2.rectangle(search_area_img,window[0],window[1],color=[255,0,0],thickness=5)
     
    left_points = [(x,y) for (x,y) in zip(xs_left,ys)]
    right_points  = [(x,y) for (x,y) in zip(xs_right,ys)]
    all_points = [pt for pt in left_points]
    all_points.extend([pt for pt in reversed(right_points)])
       
    #cv2.fillPoly(undistort_rgb,np.array([all_points],dtype = np.int32),[255,255,0])
    draw_polygon(search_area_img,left_points,color=[0,255,0],isClosed=False,thickness=10)
    draw_polygon(search_area_img,right_points,color=[0,0,255],isClosed=False,thickness=10)
    
    # Unwarp
    unwarp = warp(search_area_img,Minv)
    final = cv2.addWeighted(undistort_rgb,0.5,unwarp,1.0,0.0)
               
    plot_many([test_img_rgb,undistort_rgb,binary_image,perspective_corrected,search_area_img,final],
          ['input','undistoted','lane masks','perpective corrected mask','search windows','final'],
          [2,3],
          [None,None,'gray','gray',None,None]) 
    
if __DEBUG__ == True:
    for path in glob('test_images/*.jpg'):
        test_lane_detection(path)   
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.
Performing sliding window search.

Curvature and Displacement

The radius of curvature of the road is caculated by averaging the radii of the two lanes. Curvature calculation cannot be performed on the polynomial fitted above because the polynomial was fitted to pixel positions but these positions have to be rescaled by different factors in the horizontal and vertial directions. find_curvature() rescales pixel coordinates as needed and then fits a new polynomial and calculates the curvature using the standard formula.

The scaling factors for the horizontal and vertical directions have been derived based on the output of the reference image used for perpective transformation. The width of the lanes and height of the roads in pixels is obtained from the bird's eye view plotted there.

Displacement is the horizontal offset of the camera from the calculated center of the lanes. This cauculation is performed from lane bases coordinates obtained from the polynomial fitted earlier.

In [73]:
## The height, width calculations below depend on the order in which target points for perspective transform have been
## defined. In our case the points are defined as **Bottom left -> Bottom right -> Top right -> Top left**.

lane_width = abs(perspective_dst_points[0][0] - perspective_dst_points[1][0])
lane_height = abs(perspective_dst_points[0][1] - perspective_dst_points[3][1])
ym_per_pix = 30/lane_height # meters per pixel in y dimension
xm_per_pix = 3.7/lane_width # meters per pixel in x dimension
In [74]:
def find_curvature(xs_pixel,ys_pixel,at_pixel): 
    xs = xm_per_pix * xs_pixel
    ys = ym_per_pix * ys_pixel
    at = ym_per_pix * at_pixel
    
    poly = np.polyfit(ys,xs,2)
    curvature =  ((1 + (2*poly[0]*at + poly[1]) ** 2) ** 1.5) / abs(2*poly[0])
    return curvature
In [75]:
def get_displacement(base_left_x,base_right_x, base_center_x):
    center_x  = (base_left_x+base_right_x)/2.0
    displacement = abs(center_x-base_center_x) 
    
    return displacement * xm_per_pix

Mainting Lane History

The class FitsHist maintains history of the polynomials fitted to the last few frames. This information is used to smoothen the predicted lane positions in a video as well as predict lane positions for any frames where valid lanes are not detected.

In [76]:
from collections import deque

class FitsHist: 
    def __init__(self,sz=5):
        self.history = deque(maxlen=sz)
        
    def is_empty(self):
        return len(self.history) is 0
    
    def is_not_full(self):
        return len(self.history) < maxlen(self.history)
    
    def get_len(self):
        return len(self.history)
    
    def add_fit_info(self,polyfits,radii):
        self.history.extend([[polyfits,radii]])
        
    def get_all_fit_info(self):
        if self.is_empty():
            return None
        
        return self.history
    
    def get_prev_fit_info(self):
        if self.is_empty():
            return None
        
        return self.history[-1]
    
    def popleft(self):
        return self.history.popleft()
    
In [77]:
fit_hist = None

def clr_hist():
    fit_hist = None

Core Logic

The function draw_lanes() encapsulates the high level logic for processing one frame of a video or a single image. It applies distortion correction, masking, warping ,lane search and fits a polynomial to the output of lane search. It maintains a history of the last 5 fitted polynomials and checks evey new frame against this history to skip over outliers. It computes the radius of curvature and displacement. The lane poynomials and the region between the lanes is painted over the input image and the calculated information is printed nead the bottom.

In [78]:
def draw_lanes(img):
    
    def check_valid(left_lane_info,right_lane_info):
        _, left_lane_candidates  = left_lane_info
        _,right_lane_candidates  = right_lane_info

        valid_left_found  = (len(left_lane_candidates[0]) > 0) &\
                            (len(left_lane_candidates[0]) == len(left_lane_candidates[1]))
        
        valid_right_found = (len(right_lane_candidates[0]) > 0) &\
                            (len(right_lane_candidates[0]) == len(right_lane_candidates[1]))
            
        return valid_left_found,valid_right_found
    
    def get_fits(left_lane_info,right_lane_info):
        if left_lane_info is not None:
            _, left_lane_candidates  = left_lane_info
            left_fit = fit_poly_to_lane(*left_lane_candidates)
        else:
            left_fit = None
            
        if right_lane_info is not None:
            _,right_lane_candidates  = right_lane_info
            right_fit = fit_poly_to_lane(*right_lane_candidates)
        else:
            right_fit = None
        
        return  left_fit,right_fit
    
    def get_raw_radii(left_lane_info,right_lane_info,y_bottom):
        
        radius_left, radius_right = None, None
        
        if left_lane_info is not None:
            _,(left_lane_x,left_lane_y) = left_lane_info
            radius_left  = find_curvature(left_lane_x,left_lane_y,y_bottom)
        
        if right_lane_info is not None:
            _,(right_lane_x,right_lane_y) = right_lane_info
            radius_right = find_curvature(right_lane_x,right_lane_y,y_bottom)
            
        return (radius_left,radius_right)
    
    
    def get_smoothed_predictions(history,ys):
        xs_left, xs_right = np.zeros_like(ys), np.zeros_like(ys)
        
        all_fit_infos = history.get_all_fit_info()
        for fit_info in all_fit_infos:
            fits = fit_info[0]
            xs_left += eval_qudratic(fits[0],ys)
            xs_right += eval_qudratic(fits[1],ys)
       
        xs_left  = (xs_left / len(all_fit_infos)).astype(np.int32)
        xs_right = (xs_right / len(all_fit_infos)).astype(np.int32)
        
        return xs_left,xs_right
    
    def get_smoothed_radius(history):
        all_fit_infos = history.get_all_fit_info()
        
        radius_left, radius_right = 0.0, 0.0
        for fit_info in all_fit_infos:
            radii = fit_info[1]
            
            radius_left += radii[0]
            radius_right += radii[1]
            
        radius_left = radius_left / len(all_fit_infos)
        radius_right = radius_right/ len(all_fit_infos)
        
        return (radius_left+radius_right)/2.
        
        
    
    def outlier_detect(frame,history,left_poly,right_poly):
        ys  = np.array(range(frame.shape[0]),dtype=np.int32)
        
        left_xs  = eval_qudratic(left_poly,ys)
        right_xs = eval_qudratic(right_poly,ys)
        
        lane_width = np.abs(left_xs-right_xs)
        
        # Get minimum and maximum widths in meters.
        min_width = np.min(lane_width) * xm_per_pix
        max_width = np.max(lane_width) * xm_per_pix
        
        if min_width < 3 or max_width > 4:
            return True
        
        hist_left_xs, hist_right_xs = get_smoothed_predictions(history,ys)
        
        deviations_left = np.abs(left_xs-hist_left_xs)
        deviations_right = np.abs(right_xs-hist_right_xs)
        
        left_max_dev = np.max(deviations_left)
        right_max_dev = np.max(deviations_right)
        
        if  left_max_dev > 100 or right_max_dev > 100:
            return True
        
        return False
    

   
    # Apply camera distortion correction.
    undistorted = cv2.undistort(img,cameraMatrix,distCoeffs)
    
    # Extract mask with lane pixels.
    thresh_params = get_thresholds()
    binary_image = get_lane_mask(undistorted,thresh_params)
    
    # Switch to bird's eye view of the lanes.
    perspective_corrected = warp(binary_image,M) 
    
    # Y coordinate at the bottom of the frame.
    y_bottom = perspective_corrected.shape[0]-1
    
    # Get coordinates of potential lane pixels.
    global fit_hist
    
    if fit_hist is None or fit_hist.is_empty():
        if fit_hist == None:
            fit_hist = FitsHist()
        
        # Get lane pixels.
        lane_infos = find_lanes(perspective_corrected,None)
    
        valid_left_found, valid_right_found = check_valid(*lane_infos)
        if valid_left_found == False or valid_right_found == False:
            raise Exception("No lane history exists and failed to identify new lanes.")
        
        # Fit a poynomial to lane pixels.
        left_poly, right_poly = get_fits(*lane_infos)
        
        # Calculate radii from raw data
        radii = get_raw_radii(*lane_infos,y_bottom)
        
        # Add new fits to history.
        fit_hist.add_fit_info((left_poly, right_poly),radii)
    
        
        
    else:
        prev_fit_info = fit_hist.get_prev_fit_info()
        prev_polys = prev_fit_info[0]
        prev_radii = prev_fit_info[1] 
        
        assert prev_fit_info is not None and prev_polys is not None
        

        lane_infos = find_lanes(perspective_corrected,prev_polys)
        valid_left_found, valid_right_found = check_valid(*lane_infos)
        
        # Fit polynomials to left and right lane's coordinates.
        # Calculate radii from raw data
        
        if valid_left_found and valid_right_found:
            left_poly,right_poly = get_fits(*lane_infos)
            left_radius,right_radius = get_raw_radii(*lane_infos,y_bottom)
        
        elif valid_left_found:
            if __DEBUG__ == True:
                print("Reusing right lane info from history.")
            left_poly,_ = get_fits(lane_infos[0],None)
            right_poly  = prev_polys[1]
            
            left_radius,_ = get_raw_radii(lane_infos[0],None)
            right_radius  = prev_radii[1]

        elif valid_right_found:
            if __DEBUG__ == True:
                print("Reusing left lane fit from history.")
            _,right_poly = get_fits(None,lane_infos[1])
            left_poly  = prev_polys[0]
            
            _,right_radius = get_raw_radii(None,lane_infos[1])
            left_radius = prev_radii[0]

        else:
            if __DEBUG__ == True:
                print("Reusing both lane fits from history.")
            left_poly, right_poly = prev_polys
            left_radius, right_radius = prev_radii
        
        # Skip adding outlier fits to history.   
        if outlier_detect(perspective_corrected,fit_hist,left_poly,right_poly):
            if fit_hist.get_len()<=2:
                fit_hist.add_fit_info((left_poly, right_poly), (left_radius, right_radius))
            else:
                fit_hist.popleft()
                if __DEBUG__ == True:
                    print("Outlier frame dropped!")
        else:
            fit_hist.add_fit_info((left_poly, right_poly), (left_radius, right_radius))
            
        
        
    
    # Generate y coordinates for the entire frame.
    ys = np.array(range(1,perspective_corrected.shape[0]),dtype=np.int32)
    
    # Get smoothed, predictions for x coordinates of both lanes.
    xs_left, xs_right = get_smoothed_predictions(fit_hist,ys)       
    
    # Combine x, y into points for individual lanes and polygon area
    # between the lanes.
    left_pts  = [(x,y) for (x,y) in zip(xs_left, ys)]
    right_pts = [(x,y) for (x,y) in zip(xs_right,ys)]
    all_pts = [pt for pt in left_pts]
    all_pts.extend([pt for pt in reversed(right_pts)])
    
    # Get radius of curvature and estimated displacement of the camera
    # from center of the lanes.
    x_center = perspective_corrected.shape[1] / 2
    base_left_x = xs_left[np.where(ys==y_bottom)[0]][0]
    base_right_x = xs_right[np.where(ys==y_bottom)[0]][0]
    displacement = get_displacement(base_left_x,base_right_x,x_center)
    
    radius = get_smoothed_radius(fit_hist)
    
    # Add radius and displacement to the frame image.
    cv2.putText(undistorted,
                "Radius: {:0.2f} m".format(radius),
                (400,650),
                cv2.FONT_HERSHEY_PLAIN,
                 3.,
                [0,255,0],5)

    cv2.putText(undistorted,
                "Displacement: {:0.2f} cm".format(displacement*100),
                (400,680),
                cv2.FONT_HERSHEY_PLAIN,
                 2.5,
                [0,255,0],5)
    
    # Get an RGB copy of the bird's eye view and do all lane drawing
    # region hghlighting on this copy.
    overlay = np.dstack((perspective_corrected,perspective_corrected,perspective_corrected)) * 255
    
    cv2.fillPoly(overlay,np.array([all_pts],dtype = np.int32),[255,255,0])
    draw_polygon(overlay,left_pts,color=[0,255,0],isClosed=False,thickness=30)
    draw_polygon(overlay,right_pts,color=[0,0,255],isClosed=False,thickness=30)
     
    # Unwarp our copy of the bird's eye view. This will automatically unwarp all
    # the drawing we did above.
    unwarp = warp(overlay,Minv)
    
    # Overlay drawings on top of the frame image.
    final = cv2.addWeighted(undistorted,1.0,unwarp,0.3,0.0)
    
    return final

Video Processing Pipeline

In [81]:
from moviepy.editor import VideoFileClip
clr_hist()
in_clip = VideoFileClip('project_video.mp4')

out_filename = 'processed-project_video.mp4'
out_clip = in_clip.fl_image(draw_lanes)

out_clip.write_videofile(out_filename,audio=False)

clr_hist()
[MoviePy] >>>> Building video processed-project_video.mp4
[MoviePy] Writing video processed-project_video.mp4
100%|█████████▉| 1260/1261 [10:00<00:00,  2.19it/s]
[MoviePy] Done.
[MoviePy] >>>> Video ready: processed-project_video.mp4